## REPLICATION FILE FOR DE LA CUESTA AND IMAI (2016)

# This file will produce all results and corresponding figures from the paper
# Note: you will need to run the preamble as the analysis uses several
#       custom functions and functions from packages. 

setwd("D:/Dropbox/Research Projects/RD ARPS/replication archive") ## Set directory

###################### LOAD REQUIRED PACKAGES AND FUNCTIONS ########################

packages <- c("lmtest", "sandwich", "rdrobust", "stargazer", 
              "mgcv", "foreign", "plotrix")
lapply(packages, require, character.only = T)
source("functions.R") ## Load required functions from functions.R file



################################################################################################
######################## FIGURE 1: FORCING SLOPE ###############################################
################################################################################################


##### Var choice: # Prv D Terms, D Prop Campaign Spending, % Voter Turnout ##############
d <- read.dta("RDReplication.dta", convert.factors = F) ## Load original dataset
d <- d[d$Use == 1 & is.na(d$DifDPct) == F 
       & is.na(d$DifDPNxt) == F & is.na(d$DifDPPrv) == F, ] ## Subset to observations used by C&S
c.vars <- read.csv("var_correspondence.csv", header = T, stringsAsFactors = F) ## Create list of variables to use from correspondence table:
lower = -0.075; upper = abs(lower); window.left = -0.02; window.right = abs(window.left); cutpoint = 0
d$DifDPct <- d$DifDPct/100
d$DDonaPct <- d$DDonaPct/100
d$DSpndPct <- d$DSpndPct/100

pdf("illustration_plot.pdf", height = 7, width = 14)
par(mfrow = c(1, 2),
    mar = c(5, 5, 5, 5),
    oma = c(2, 3, 2, 2))

###################### Main plots ########################################################
c.axis = 1.4; title.size = 1.4; c.lab = 1.4; c.point = 1.4; cutpoint = 0 ## Points/other size parameters
var.name = "DExpAdv" ## First variable
rdplot(d[, var.name], d$DifDPct, c = cutpoint, x.label = "Democratic Margin",
       x.lim = c(lower, upper), y.label = "Candidate Experience",
       title = "Democratic Experience Advantage",
       cex.main = title.size, cex.axis = c.axis, cex.lab = c.lab, cex = c.point)
left.mean <- mean(d[d$DifDPct < cutpoint & d$DifDPct > window.left, var.name], na.rm = T)
right.mean <- mean(d[d$DifDPct > cutpoint & d$DifDPct < window.right, var.name], na.rm = T)
segments(x0 = window.left, x1 = cutpoint, y0 = left.mean, y1 = left.mean, col = "red", lty = 2, lwd = 2)
segments(x0 = cutpoint, x1 = window.right, y0 = right.mean, y1 = right.mean, col = "red", lty = 2, lwd = 2)
abline(v = window.left, lty = 3); abline(v = window.right, lty = 3)

var.name = "DSpndPct" ## Second variable
rdplot(d[, var.name], d$DifDPct, c = cutpoint, x.label = "Democratic Margin",
       x.lim = c(lower, upper), y.label = "Percent of Total Spending",
       title = "Percent of Total Election Spending", 
       cex.main = title.size, cex.axis = c.axis, cex.lab = c.lab, cex = c.point) 
left.mean <- mean(d[d$DifDPct < cutpoint & d$DifDPct > window.left, var.name], na.rm = T)
right.mean <- mean(d[d$DifDPct > cutpoint & d$DifDPct < window.right, var.name], na.rm = T)
segments(x0 = window.left, x1 = cutpoint, y0 = left.mean, y1 = left.mean, col = "red", lty = 2, lwd = 2)
segments(x0 = cutpoint, x1 = window.right, y0 = right.mean, y1 = right.mean, col = "red", lty = 2, lwd = 2)
abline(v = window.left, lty = 3); abline(v = window.right, lty = 3)
dev.off()


#############################################################################################################
################################### FIGURE 2: ANALYSIS ######################################################
#############################################################################################################

c <- read.dta("RDReplication.dta", convert.factors = F) ## Load original dataset
c <- c[c$Use == 1 & is.na(c$DifDPct) == F 
       & is.na(c$DifDPNxt) == F & is.na(c$DifDPPrv) == F, ] ## Subset to observations used by C&S
                                                            ## And with non-missing t - 1 and t + 1 margin  
c.vars <- read.csv("var_correspondence.csv", header = T, stringsAsFactors = F) ## Create list of variables to use from correspondence table:
vars <- paste(c.vars$name, sep = "")
vars[which(vars %in% colnames(c) == F)] ## Check to make sure all vars in here/correct names
c[, vars[c.vars$type == 0]] <- scale(c[, vars[c.vars$type == 0]], center = T, scale = T) ## Standardize all non-binary covariates for easier comparison
c$DifDPct <- c$DifDPct/100 ## Change scale to unit scale
cut <- 0 ## Specify cut point 
which.results <- 3 ## Sets which values to pull from rdrobust object 
                   ## (1 = convenetional, 2 = bias corrected, 3 = robust), not implemented currently
c.axis = 1.4; title.size = 1.4; c.lab = 1.4; c.point = 1.4 ## Points/other size parameters
w = 14; h = 8 ## Specify height and width for figures 

################################# SPECIFY WINDOWS TO LOOP OVER AND BEGIN LOOP ###########################

windows <- c(-0.02, -0.005)
tables.names <- c("table", "table_half")

for(j in 1:length(windows)){
window.left <- windows[j]; window.right <- abs(window.left) ## Set window to use for comparison
table.suffix <- tables.names[j]

##################################### T-tests for each covariate ########################################

diff.table <- matrix(data = NA, nrow = length(vars), ncol = 5)
colnames(diff.table) <- c("Estimate", "Lower 95% CI", "Upper 95% CI", "Z-Statistic", "P-Value")
rownames(diff.table) <- c.vars$label
for(i in 1:length(vars)){ ## Loop over list
  c.sub <- na.omit(c[, c(vars[i], "DifDPct")]) ## Take only complete observations for DV + loop covariate
  c.sub$d.share <- c.sub$DifDPct ## Rename to match code
  c.sub$dw <- c.sub[, 1] ## dw = covariate of interest
  fit.close <- subset(c.sub, d.share >= window.left & d.share <= window.right, ) ## Subset data to window specified at script start
  test <- t.test(fit.close[fit.close$d.share >= cut, "dw"], 
                 fit.close[fit.close$d.share < cut, "dw"]) ## T-Test: Note that this takes form Right of CP - Left of CP
  diff.table[i, ] <- c(test$estimate[1] - test$estimate[2], test$conf.int[1], 
                       test$conf.int[2], test$statistic, test$p.value)} ## Diff, T-Stat, Lower CI, Upper CI, P-Val and close loop
diff.table <- as.data.frame(diff.table)

##################################### Regression Estimates ###############################################
require(mgcv); require(sandwich)
reg.table <- matrix(data = NA, nrow = length(vars), ncol = 5)
colnames(reg.table) <- c("Estimate", "Lower 95% CI", "Upper 95% CI", "Z-Statistic", "P-Value")
rownames(reg.table) <- c.vars$label
estimate <- rep(NA, length(vars)); ci.lower <- rep(NA, length(vars)); ci.upper <- rep(NA, length(vars))
z.regression <- rep(NA, length(vars)); p.regression <- rep(NA, length(vars));
slope.left <- p.regression <- rep(NA, length(vars)); slope.right <- p.regression <- rep(NA, length(vars))
for(i in 1:length(vars)){
  c.sub <- na.omit(c[, c(vars[i], "DifDPct")])
  c.sub$d.share <- c.sub$DifDPct
  c.sub$dw <- c.sub[, vars[i]]
  fit.close <- subset(c.sub, d.share >= window.left & d.share <= window.right)
  fit.low <- subset(c.sub, d.share >= window.left & d.share < cut)## For left-hand side subset
  lm.low <- lm(dw ~ d.share , data = fit.low)
  slope.left[i] <- summary(lm.low)$coefficients[2, 3]
  fit.high <- subset(c.sub, d.share >= cut & d.share <= window.right) ## For right-hand side subset
  lm.high <- lm(dw ~ d.share, data = fit.high)
  slope.right[i] <- summary(lm.high)$coefficients[2, 3]
  estimate <- (coef(lm.high)[1] + coef(lm.high)[2] * cut) - (coef(lm.low)[1] + coef(lm.low)[2] * cut) ## Estimate discontinuity
  new.obs <- data.frame(d.share = cut) ## New dataset with d.share set to cutpoint
  se.ctrl <- sqrt(vcovHC(lm.low, type = "HC3")[1, 1] 
                  + new.obs^2 * vcovHC(lm.low, type = "HC3")[2, 2] 
                  + 2 * new.obs * vcovHC(lm.low, type = "HC3")[2, 1])
  se.treat <- sqrt(vcovHC(lm.high, type = "HC3")[1, 1] 
                   + new.obs^2 * vcovHC(lm.high, type = "HC3")[2, 2] 
                   + 2 * new.obs * vcovHC(lm.high, type = "HC3")[2, 1])
  se.estimate <- sqrt(se.ctrl^2 + se.treat^2)
  ci.lower <- estimate - se.estimate * qnorm(0.975)
  ci.upper <- estimate + se.estimate * qnorm(0.975)
  z.regression <- estimate /se.estimate
  p.regression <- 2*pnorm(abs(estimate /se.estimate), lower.tail = F)
  reg.table[i, ] <- c(estimate, ci.lower, ci.upper, z.regression, p.regression)}
reg.table <- as.data.frame(reg.table)
reg.table$"Left Slope T" <- slope.left; reg.table$"Right Slope T" <- slope.right

################################################# LOCAL LINEAR REGRESSION ################################

### CCT BANDWIDTH

require(rdrobust)
rd.table <- matrix(data = NA, nrow = length(vars), ncol = 10)
rownames(rd.table) <- c.vars$label
colnames(rd.table) <- c("Estimate", "Lower 95% CI", "Upper 95% CI", "Z-Statistic", 
                        "Raw Obs", "Obs Left", "Obs Right", "Bandwidth", "Window", "P-Value")
for(i in 1:length(vars)){
  c.comp <- na.omit(c[, c(paste(vars[i]), "DifDPct")])
  est.rd <- rdrobust(y = c.comp[, 1], x = c.comp[, 2], c = cut, all = T)
  rd.table[i, ] <- c(est.rd$coef[3], est.rd$ci[3, ], est.rd$z[3], as.numeric(est.rd$tabl1.str[1]), ## Note, this takes robust, bias-corrected estimates
                     as.numeric(est.rd$tabl2.str[1, 1]), as.numeric(est.rd$tabl2.str[1, 2]), 
                     as.numeric(est.rd$tabl2.str[4, 1]), cut - window.left, est.rd$pv[3])}
rd.table <- as.data.frame(rd.table)

############################## WRITE RESULTS TO CSV TO USE IN FIGURES ####################################

write.csv(diff.table, paste("diff.", paste(table.suffix), ".csv", sep = "")) ## Print all the tables created to so far in .csv form:
write.csv(reg.table, paste("reg.", paste(table.suffix), ".csv", sep = ""))
write.csv(rd.table, paste("rd.", paste(table.suffix), ".csv", sep = ""))

} ## End loop

###########################################################################################################
############################## FIGURE 2: VISUALIZATION ####################################################
###########################################################################################################

############################# LOAD DATA ######################################

two.diff <- read.csv("diff.table.csv", header = T)[4:28, ] ## two
two.diff <- two.diff[order(two.diff$Estimate, decreasing = F), ]
two.reg <- read.csv("reg.table.csv", header = T)[4:28, ] ## two
two.reg <- two.reg[order(two.reg$Estimate, decreasing = F), ]
two.rd <- read.csv("rd.table.csv", header = T)[4:28, ] ## two
two.rd <- two.rd[order(two.rd$Estimate, decreasing = F), ]
half.diff <- read.csv("diff.table_half.csv", header = T)[4:28, ] ## Half
half.diff <- half.diff[c(rownames(two.diff)), ]
half.reg <- read.csv("reg.table_half.csv", header = T)[4:28, ] ## Half
half.reg <- half.reg[c(rownames(two.reg)), ]

############## DIFFERENCE FIGURE #############################################
c.axis = 1.4; title.size = 1.4; c.lab = .9; c.names = 0.7 ## Points/other size parameters
h = 8; w = 14; spacing <- 1.5 ## Spacing parameter
x.low <- -1.7; x.high <- abs(x.low) ## xlim values
pos.figures <- x.low + .075

################################# Begin Plot #################################
pdf("approach_comparison.pdf", width = w, height = h) ## Begin PDF
par(mfrow = c(1, 3))

## Differences
x <- seq(from = 1.5, to = 3*(nrow(half.diff)-1) + 1.5, by = 3)
points.below <- x - .5; points.above <- x + .5
names(x) <- half.diff$X
par(mar= c(5.1,9.1,4.1,.5)) ## Margin specification for proper presentation
plot(x = half.diff$"Estimate", y = x, type = "n", ## Note this is an empty plot
     xlim = c(x.low, x.high), ylab = "", bty = "n", yaxt = "n", cex.lab = c.lab,
     xlab = "Estimated Discontinuity (Standard Deviation Units for Non-Binary Measures)", pch = 19, 
     main = "Difference-in-means in window", cex.main = title.size)
poly.seq <- x[seq(from = 1, to = length(x), by = 2)] ## Take half the vector to create polygons
for(i in 1:length(poly.seq)){ ## Loop for each value in poly.seq
  polygon(x = c(x.low, x.low, x.high, x.high), ## Vertices of polygon
          y = c(poly.seq[i] - spacing, poly.seq[i] + spacing, 
                poly.seq[i] + spacing, poly.seq[i] - spacing), ## Width of polygon
          border = FALSE, col = 'lightgray')} ## Border (noshow), color and close loop
points(x = half.diff$"Estimate", y = points.below, cex = 0.75, pch = 1)
points(x = two.diff$"Estimate", y = points.above, cex = 0.75, pch = 19)
segments(x0 = half.diff[, 3], y0 = points.below, y1 = points.below, x1 = half.diff[, 4], 
         lty = "dashed") ## Specify line type
segments(x0 = two.diff[, 3], y0 = points.above, y1 = points.above, x1 = two.diff[, 4]) ## Specify line type
axis(2, pos = pos.figures, at = x, names(x), las =2, cex = c.names, tick = 0) ## Draw axis, subset labels to remove dvs
abline(v = 0, lty = 3)

## Regression
x <- seq(from = 1.5, to = 3*(nrow(half.reg)-1) + 1.5, by = 3)
points.below <- x - .5; points.above <- x + .5
names(x) <- half.reg$X
par(mar= c(5.1,9.1,4.1,2.1)) ## Margin specification for proper presentation
plot(x = half.reg$"Estimate", y = x, type = "n", ## Note this is an empty plot
     xlim = c(x.low, x.high), ylab = "", bty = "n", yaxt = "n", cex.lab = c.lab,
     xlab = "Estimated Discontinuity (Standard Deviation Units for Non-Binary Measures)", pch = 19, 
     main = "Linear regression in window", cex.main = title.size)
poly.seq <- x[seq(from = 1, to = length(x), by = 2)] ## Take half the vector to create polygons
for(i in 1:length(poly.seq)){ ## Loop for each value in poly.seq
  polygon(x = c(x.low, x.low, x.high, x.high), ## Vertices of polygon
          y = c(poly.seq[i] - spacing, poly.seq[i] + spacing, 
                poly.seq[i] + spacing, poly.seq[i] - spacing), ## Width of polygon
          border = FALSE, col = 'lightgray')} ## Border (noshow), color and close loop
points(x = half.reg$"Estimate", y = points.below, cex = 0.75, pch = 1)
points(x = two.reg$"Estimate", y = points.above, cex = 0.75, pch = 19)
segments(x0 = half.reg[, 3], y0 = points.below, y1 = points.below, x1 = half.reg[, 4], 
         lty = "dashed") ## Specify line type
segments(x0 = two.reg[, 3], y0 = points.above, y1 = points.above, x1 = two.reg[, 4]) ## Specify line type
axis(2, pos = pos.figures, at = x, names(x), las =2, cex = c.names, tick = 0) ## Draw axis, subset labels to remove dvs
abline(v = 0, lty = 3)

## LLR
x <- seq(from = 1.5, to = 3*(nrow(two.rd)-1) + 1.5, by = 3)
points.below <- x - .5; points.above <- x + .5
names(x) <- two.rd$X
par(mar= c(5.1,9.1,4.1,2.1)) ## Margin specification for proper presentation
plot(x = two.rd$"Estimate", y = x, type = "n", ## Note this is an empty plot
     xlim = c(x.low, x.high), ylab = "", bty = "n", yaxt = "n", cex.lab = c.lab,
     xlab = "Estimated Discontinuity (Standard Deviation Units for Non-Binary Measures)", pch = 19, 
     main = "Local linear regression", cex.main = title.size)
poly.seq <- x[seq(from = 1, to = length(x), by = 2)] ## Take half the vector to create polygons
for(i in 1:length(poly.seq)){ ## Loop for each value in poly.seq
  polygon(x = c(x.low, x.low, x.high, x.high), ## Vertices of polygon
          y = c(poly.seq[i] - spacing, poly.seq[i] + spacing, 
                poly.seq[i] + spacing, poly.seq[i] - spacing), ## Width of polygon
          border = FALSE, col = 'lightgray')} ## Border (noshow), color and close loop
points(x = two.rd$"Estimate", y = x, cex = 0.75, pch = 19)
segments(x0 = two.rd[, 3], y0 = x, y1 = x, x1 = two.rd[, 4]) ## Specify line type

axis(2, pos = pos.figures, at = x, names(x), las =2, cex = c.names, tick = 0) ## Draw axis, subset labels to remove dvs
abline(v = 0, lty = 3)
dev.off()


###################################################################################################
############################ FIGURE 3: RESIDUALS PLOT #############################################
###################################################################################################

############################# CONGRESSIONAL ELECTIONS #############################################

############################## LOAD IN AND MERGE DATA #############################################

c <- read.dta("RDReplication.dta", convert.factors = F) ## Load original dataset
c <- c[c$Use == 1 & is.na(c$DifDPct) == F 
       & is.na(c$DifDPNxt) == F & is.na(c$DifDPPrv) == F, ] ## Subset to observations used by C&S 
                                                            ## with non-missing t, t - 1 and t + 1 margin
state.names <- read.csv("state_names.csv", header = F)
colnames(state.names) <- c("state_caps", "state_long", "state")
state.names$StAlphCd <- 1:50; 
c <- merge(c, state.names, by = "StAlphCd")
c$state_year <- as.factor(paste(c$state, c$YearElec, sep = "-"))
m <- read.csv("state_merge.csv", header = T)
m.merge <- m[, c("state_year", "midterm", "norm_vote", "lag_norm_vote", "lag_norm_vote2")] ## Change dataset name and subset m dataset
c <- merge(c, m.merge, by = "state_year"); 
d <- c; d$rv <- d$DifDPct/100; d$lag_rv <- d$DifDPPrv/100; d$dv <- d$DifDPNxt/100
x.max <- .40 ## Set max margin considered as in Hainmueller
d <- d[abs(d$rv) < x.max, ] ## Subset according to Hain paper and rename

####################################### DEFINE VARIABLES AND ESTIMATE ##############################

controls <- c("lag_rv", "norm_vote", "lag_norm_vote", "lag_norm_vote2", "midterm")
iv <- c("rv"); dv <- c("dv")
eq.all <- make.equation(dv, c(iv, controls))
d.left <- na.omit(d[d$rv < 0 & d$rv > -.15, c(controls, iv, dv)]) 
d.right <- na.omit(d[d$rv > 0 & d$rv < .15, c(controls, iv, dv)]) ## Create data subsets left/right of 0
out.left <- lm(eq.all, data = d.left)
d.left$presid <- d.left$dv - as.matrix(model.frame(out.left)[, controls]) %*% as.matrix(coef(out.left)[controls]) 
rv.left <- lm(presid ~ rv, data = d.left)
out.right <- lm(eq.all, data = d.right)
d.right$presid <- d.right$dv - as.matrix(model.frame(out.right)[, controls]) %*% as.matrix(coef(out.right)[controls]) 
rv.right <- lm(presid ~ rv, data = d.right)
d.all <- rbind(d.left, d.right)

######################### PLOT RESULTS, LLR AND OLS ############################################

seq.left <- seq(from = -.15, to = 0, by = 0.01)
seq.right <- seq(from = 0, to = .15, by = 0.01)
rdrobust(y = d.all$presid, x = d.all$rv, c = 0)
coefs <- rdplot(y = d.all$presid, x = d.all$rv, c = 0, 
                y.lim = c(-.075, .075), hide = T)$coef
## Left side prediction
fit.left <- predict(rv.left, newdata = data.frame(rv = seq.left))
lwr.left <- c(out.left$coef[1] + out.left$coef[2]*seq.left
              - 1.96 * sqrt(vcov(out.left)[1, 1] + seq.left^2 * vcov(out.left)[2, 2] + 2 * seq.left * vcov(out.left)[2, 1]))
upr.left <- c(out.left$coef[1] + out.left$coef[2]*seq.left
              + 1.96 * sqrt(vcov(out.left)[1, 1] + seq.left^2 * vcov(out.left)[2, 2] + 2 * seq.left * vcov(out.left)[2, 1]))
ext.left <- predict(rv.left, newdata = data.frame(rv = seq.right))
## Right side prediction
fit.right <- predict(rv.right, newdata = data.frame(rv = seq.right))
lwr.right <- c(out.right$coef[1] + out.right$coef[2]*seq.right
               - 1.96 * sqrt(vcov(out.right)[1, 1] + seq.right^2 * vcov(out.right)[2, 2] + 2 * seq.right * vcov(out.right)[2, 1]))
upr.right <- c(out.right$coef[1] + out.right$coef[2]*seq.right
               + 1.96 * sqrt(vcov(out.right)[1, 1] + seq.right^2 * vcov(out.right)[2, 2] + 2 * seq.right * vcov(out.right)[2, 1]))
ext.right <- predict(rv.right, newdata = data.frame(rv = seq.left))
    ## Note, robust standard errors not used here

## Begin plot
c.axis = 1.2; title.size = 1.4; c.lab = 1.2; c.point = 1.2 ## Points/other size parameters
w = 12; h = 6; y.range <- c(-.9, 1.2); lwd.cis <- 2
y.name = "Partial Residual"; x.name = "Democratic Margin"
pdf("cia_extrapolation.pdf", height = h, width = w)
par(mfrow = c(1, 2))
rdplot.alt(y = d.all$presid, x = d.all$rv, c = 0, 
           col.lines = "red", y.lim = y.range,
           y.lab = y.name, x.lab = x.name,
           title = "House Elections, 1946-2012", 
           cex.main = title.size, cex = c.point, 
           cex.lab = c.lab, cex.axis = c.axis)
## Left-side observed and counterfactual CEF
lines(seq.left, fit.left, lwd = 2, col = "black")
lines(seq.right, ext.left, lwd = 2, lty = 2, col = "black")
lines(seq.left, lwr.left, lty = 3, lwd = lwd.cis)
lines(seq.left, upr.left, lty = 3, lwd = lwd.cis)
## Right-side observed and counterfactual CEF
lines(seq.right, fit.right, lwd = 2, col = "black")
lines(seq.left, ext.right, lwd = 2, lty = 2, col = "black")
lines(seq.right, lwr.right, lty = 3, lwd = lwd.cis)
lines(seq.right, upr.right, lty = 3, lwd = lwd.cis)


################################# State-Level ELECTIONS #########################################

load("statewide_analysis_Rep.RData") ## Load Hainmueller data
x$rv <- x$rv/100 ## Rescale to 0-1
x$dv <- x$dv/100
x$next_pct_D <- x$next_pct_D/100
x.max <- .40 ## Set max margin considered
d <- x[abs(x$rv) < x.max, ] ## Subset according to Hain paper and rename


######################## BASIC RESULTS, THREE COVARIATE SETS ###################################

controls <- c("lag", "norm_vote", "lag_norm_vote", "lag_norm_vote2", "midterm")
iv <- c("rv"); dv <- c("dv")

eq.all <- make.equation(dv, c(iv, controls))
d.left <- na.omit(d[d$rv < 0 & d$rv > -.15, c("state", "year", "office", controls, iv, dv)]) 
d.right <- na.omit(d[d$rv > 0 & d$rv < .15, c("state", "year", "office", controls, iv, dv)]) ## Create data subsets left/right of 0
out.left <- lm(eq.all, data = d.left)
d.left$presid <- d.left$dv - as.matrix(model.frame(out.left)[, controls]) %*% as.matrix(coef(out.left)[controls]) 
rv.left <- lm(presid ~ rv, data = d.left)
out.right <- lm(eq.all, data = d.right)
d.right$presid <- d.right$dv - as.matrix(model.frame(out.right)[, controls]) %*% as.matrix(coef(out.right)[controls]) 
rv.right <- lm(presid ~ rv, data = d.right)
d.all <- rbind(d.left, d.right)

######################### PLOT RESULTS, LLR AND OLS ############################################

seq.left <- seq(from = -.15, to = 0, by = 0.01)
seq.right <- seq(from = 0, to = .15, by = 0.01)
rdrobust(y = d.all$presid, x = d.all$rv, c = 0)
coefs <- rdplot(y = d.all$presid, x = d.all$rv, c = 0, 
                y.lim = c(-.075, .075), hide = T)$coef
## Left side prediction
fit.left <- predict(rv.left, newdata = data.frame(rv = seq.left))
lwr.left <- c(out.left$coef[1] + out.left$coef[2]*seq.left
              - 1.96 * sqrt(vcov(out.left)[1, 1] + seq.left^2 * vcov(out.left)[2, 2] + 2 * seq.left * vcov(out.left)[2, 1]))
upr.left <- c(out.left$coef[1] + out.left$coef[2]*seq.left
              + 1.96 * sqrt(vcov(out.left)[1, 1] + seq.left^2 * vcov(out.left)[2, 2] + 2 * seq.left * vcov(out.left)[2, 1]))
ext.left <- predict(rv.left, newdata = data.frame(rv = seq.right))
## Right side prediction
fit.right <- predict(rv.right, newdata = data.frame(rv = seq.right))
lwr.right <- c(out.right$coef[1] + out.right$coef[2]*seq.right
               - 1.96 * sqrt(vcov(out.right)[1, 1] + seq.right^2 * vcov(out.right)[2, 2] + 2 * seq.right * vcov(out.right)[2, 1]))
upr.right <- c(out.right$coef[1] + out.right$coef[2]*seq.right
               + 1.96 * sqrt(vcov(out.right)[1, 1] + seq.right^2 * vcov(out.right)[2, 2] + 2 * seq.right * vcov(out.right)[2, 1]))
ext.right <- predict(rv.right, newdata = data.frame(rv = seq.left))

## Begin plot
rdplot.alt(y = d.all$presid, x = d.all$rv, c = 0, 
           col.lines = "red", y.lim = y.range,
           y.lab = y.name, x.lab = x.name,
           title = "State-Level Offices, 1946-2012", 
           cex.main = title.size, cex = c.point, 
           cex.lab = c.lab, cex.axis = c.axis)
## Left-side observed and counterfactual CEF
lines(seq.left, fit.left, lwd = 2, col = "black")
lines(seq.right, ext.left, lwd = 2, lty = 2, col = "black")
lines(seq.left, lwr.left, lty = 3, lwd = lwd.cis)
lines(seq.left, upr.left, lty = 3, lwd = lwd.cis)
## Right-side observed and counterfactual CEF
lines(seq.right, fit.right, lwd = 2, col = "black")
lines(seq.left, ext.right, lwd = 2, lty = 2, col = "black")
lines(seq.right, lwr.right, lty = 3, lwd = lwd.cis)
lines(seq.right, upr.right, lty = 3, lwd = lwd.cis)
dev.off()


################################################################################################
################################################################################################
######################## FIGURE 4: CONDITIONAL INDEPENDENCE ####################################
################################################################################################
################################################################################################

########################## State-Level ELECTIONS ##################################################

load("statewide_analysis_Rep.RData") ## Load Hainmueller data
x.max <- 40 ## Set max margin considered as in Hainmueller
d <- x[abs(x$rv) < x.max, ] ## Subset according to Hain paper and rename

######################## BASIC RESULTS, THREE COVARIATE SETS ###################################

controls <- c("lag", "norm_vote", "lag_norm_vote", "lag_norm_vote2", "midterm")
iv <- c("rv"); dv <- c("dv")
eq.all <- make.equation(dv, c(iv, controls)) ## Fully specified model (a la Hainmueller)
eq.part <- make.equation(dv, c(iv, controls[-4])) ## Remove "midterm" var
eq.min <- make.equation(dv, c(iv, controls[c(1, 3)])) ## Remove t-2 share and t-2 norm vote
d.left <- na.omit(d[d$rv < 0, c(controls, iv, dv)]); d.right <- na.omit(d[d$rv > 0, c(controls, iv, dv)]) ## Create data subsets left/right of 0

######################## LEFT SIDE REGRESSIONS, FULL SPECIFICATION #############################

window <- seq(from = -x.max, to = 0 - 2, by = 2) ## Note scale of var is -100-100 here not, -1 to 1
est.left <- matrix(data = NA, nrow = length(window), ncol = 4)
for(i in 1:length(window)){
  out <- lm(eq.all, data = d.left[d.left$rv >= window[i], ])
  est.left[i, ] <- coeftest(out, vcov = vcovHC(out, type = "HC3"))[2, ]} ## HC3 robust standard errors
est.left <- as.data.frame(est.left)
colnames(est.left) <- c("Coefficient", "Std. Error", "T-Statistic", "P-Val")
rownames(est.left) <- round(window, digits = 2)

######################## RIGHT SIDE REGRESSIONS, FULL SPECIFICATION #############################

window <- seq(from = 0 + 2, to = x.max, by = 2) ## Note that "0" represents the cutpoint
est.right <- matrix(data = NA, nrow = length(window), ncol = 4)
for(i in 1:length(window)){
  out <- lm(eq.all, data = d.right[d.right$rv <= window[i], ])
  est.right[i, ] <- coeftest(out, vcov = vcovHC(out, type = "HC3"))[2, ]} ## HC3 robust standard errors
est.right <- as.data.frame(est.right)
colnames(est.right) <- c("Coefficient", "Std. Error", "T-Statistic", "P-Val")
rownames(est.right) <- round(window, digits = 2)
est.state <- rbind(est.left, est.right)


################################################################################################
################################################################################################
######################## CONDITIONAL INDEPENDENCE: POST-WAR HOUSE ##############################
################################################################################################
################################################################################################

c <- read.dta("RDReplication.dta", convert.factors = F) ## Load original dataset
c <- c[c$Use == 1 & is.na(c$DifDPct) == F 
       & is.na(c$DifDPNxt) == F & is.na(c$DifDPPrv) == F, ] ## Subset to observations used by C&S
state.names <- read.csv("state_names.csv", header = F)
colnames(state.names) <- c("state_caps", "state_long", "state")
state.names$StAlphCd <- 1:50; 
c <- merge(c, state.names, by = "StAlphCd")
c$state_year <- as.factor(paste(c$state, c$YearElec, sep = "-"))
m <- read.csv("state_merge.csv", header = T)
m.merge <- m[, c("state_year", "midterm", "norm_vote", "lag_norm_vote", "lag_norm_vote2")] ## Change dataset name and subset m dataset
c <- merge(c, m.merge, by = "state_year"); ## Merge on state_year and load dplyer package
d <- c; d$rv <- d$DifDPct; d$lag_rv <- d$DifDPPrv; d$dv <- d$DifDPNxt
x.max <- 40 ## Set max margin considered
d <- d[abs(d$rv) < x.max, ] ## Subset according to Hain paper and rename

######################## BASIC RESULTS, THREE COVARIATE SETS ###################################

controls <- c("lag_rv", "norm_vote", "lag_norm_vote", "lag_norm_vote2", "midterm")
iv <- c("rv"); dv <- c("dv")
eq.all <- make.equation(dv, c(iv, controls))
d.left <- na.omit(d[d$rv < 0, c(controls, iv, dv)]); 
d.right <- na.omit(d[d$rv > 0, c(controls, iv, dv)]) ## Create data subsets left/right of 0

######################## LEFT SIDE REGRESSIONS, FULL SPECIFICATION #############################

window <- seq(from = -x.max, to = 0 - 2, by = 2)
est.left <- matrix(data = NA, nrow = length(window), ncol = 4)
for(i in 1:length(window)){
  out <- lm(eq.all, data = d.left[d.left$rv >= window[i], ])
  est.left[i, ] <- coeftest(out, vcov = vcovHC(out, type = "HC3"))[2, ]} ## HC3 robust standard errors
est.left <- as.data.frame(est.left)
colnames(est.left) <- c("Coefficient", "Std. Error", "T-Statistic", "P-Val")
rownames(est.left) <- round(window, digits = 2)

######################## RIGHT SIDE REGRESSIONS, FULL SPECIFICATION #############################

window <- seq(from = 0 + 2, to = x.max, by = 2)
est.right <- matrix(data = NA, nrow = length(window), ncol = 4)
for(i in 1:length(window)){
  out <- lm(eq.all, data = d.right[d.right$rv <= window[i], ])
  est.right[i, ] <- coeftest(out, vcov = vcovHC(out, type = "HC3"))[2, ]} ## HC3 robust standard errors
est.right <- as.data.frame(est.right)
colnames(est.right) <- c("Coefficient", "Std. Error", "T-Statistic", "P-Val")
rownames(est.right) <- round(window, digits = 2)
est.congress <- rbind(est.left, est.right)

####################### VISUALIZE BOTH IN A SINGLE FIGURE ######################################
require(plotrix)
w = 12; h = 6
c.axis = 1.2; title.size = 1.4; c.lab = 1.2; c.point = 1.2 ## Points/other size parameters
x.lim <- c(-25, 25); y.lim = c(-9, 9)
pdf("cia_validity.pdf", width = w, height = h)
par(mfrow = c(1, 2))
plotCI(as.numeric(rownames(est.congress)), est.congress$"Coefficient",
       uiw = qnorm(0.975) * est.congress$"Std. Error", xaxt = "n",
       xlab = "Democratic Margin of Victory", pch = 19,
       ylab = "Esimated effect of forcing variable on outcome", 
       main = "House Elections, 1946-2012", 
       cex = c.point, cex.main = title.size, cex.axis = c.axis, 
       xlim = x.lim, ylim = y.lim)
axis(1, at = c(-20, -10, 0, 10, 20), labels = c(-20, -10, 0, 10, 20)/100, cex.axis = c.axis)
abline(h = 0, col = "red", lty = 2)
abline(v = 0, lty = 3)
plotCI(as.numeric(rownames(est.state)), est.state$"Coefficient",
       uiw =  qnorm(0.975) * est.congress$"Std. Error", xaxt = "n",
       xlab = "Democratic Margin of Victory", pch = 19,
       ylab = "Esimated effect of forcing variable on outcome", 
       main = "State-Level Offices, 1946-2012", 
       cex = c.point, cex.main = title.size, cex.axis = c.axis, 
       xlim = x.lim, ylim = y.lim)
abline(h = 0, col = "red", lty = 2)
abline(v = 0, lty = 3)
axis(1, at = c(-20, -10, 0, 10, 20), labels = c(-20, -10, 0, 10, 20)/100, cex.axis = c.axis)
### Note the axis command to change the scale of the variable to be consistent with rest of paper
dev.off()


